##########################################
## "Voters get what they want"          ##
##########################################
## Heinrich, Kobayashi, Long            ##
##########################################

##          
## Analysis of September survey experiment
## Script 11
## June 29, 2017


## Load and prep data from survey experiment
############################################
data <- read.csv2("data/MTurk data from September 2015.txt", quote="",sep="|")
data$Age <- 2015 - data$user_birthyr
data$Gender <- ifelse(data$user_gender == "f", 0, 1)
data$LR <- data$p4_politics
data$HiEdu <- ifelse(data$p4_education >= 5, 1, 0)

data$Time <- as.numeric(as.character(data$p8_time))
data$Failures <- data$p4_screener_failures
data$Rating <- data$p8_support
data$Just <- data$p8_nat_sec_just
data$Costs <- (1 + data$p8_cost) * 25
data <- na.omit(data)
data <- data[, c("Age", "Gender", "LR", "HiEdu", "Rating", "Costs", "Just", "Time", "Failures")]


## Remove people with excessive screener failures/ times
########################################################
## 1,414 observations; 1,386 retained
n <- nrow(data)
data <- subset(data, Time > 5 & Time < 120)
data <- subset(data, Failures <= 2)
n_dropped <- n - nrow(data)


## Some constants to adjust variables
#####################################
age_m <- mean(data$Age)
age_sd <- sd(data$Age)
LR_m <- mean(data$LR)
LR_sd <- sd(data$LR)
data$Age <- (data$Age - age_m) / age_sd
data$LR <- (data$LR - LR_m) / LR_sd


## CCES data for post-stratification; prep it
#############################################
load("output/data_preppedCCES.Rdata")
cces$Age <- (cces$Age - age_m) / age_sd
cces$LR <- (cces$LR - LR_m) / LR_sd


## Calculate data for post-stratification
#########################################
max_theo_cases <- length(unique(data$Gender)) * length(unique(cces$Age)) * length(unique(cces$LR)) * length(unique(cces$HiEdu))
cces_groups <- ddply(.data=cces, .variables=c("Gender", "Age", "LR", "HiEdu"),
                     .fun=function(x) data.frame(w_raw=sum(x$weight)))
## Add random "data" so that we can use the same formula function
cces_groups$Rating <- rnorm(nrow(cces_groups))
cces_groups$Costs <- sample(c(25, 50, 75), size=nrow(cces_groups), replace=TRUE)
cces_groups$Just <- sample(c(0, 1, 2), size=nrow(cces_groups), replace=TRUE)
 

## Estimate the two models 
###########################
formulas <- c(Rating ~ 1 + I(Just == 1) + I(Just == 2) + I(Costs == 25) + I(Costs == 75) ,
              Rating ~ 1 + I(Just == 1) + I(Just == 2) + I(Costs == 25) + I(Costs == 75) + Age + Gender + HiEdu + LR)

output <- vector("list", 2)
for(i in 1:length(formulas))
{
  if(i == 1)
  {
    X <- model.matrix(frml=formulas[1][[1]], data=data)
    Y <- data$Rating
  }
  
  if(i == 2)
  {
    X <- model.matrix(frml=formulas[i][[1]], data=data)
    Y <- data$Rating
    X_ps <- model.matrix(frml=formulas[i][[1]], data=cces_groups)[,-1]
  }
  
  
  ## Estimate the models
  ######################
  out <- vector("list", chains)
  for(k in 1:chains)
  {
    ## Estimate model
    out_tmp <- optim(par=rnorm(ncol(X)+2),
                     fn=rr.logpostdens.k4, method="SANN", X=X, Y=Y, 
                     control=list(fnscale=-1, maxit=5000), hessian=FALSE)
    out_tmp <- optim(par=out_tmp$par,
                     fn=rr.logpostdens.k4, method="BFGS", X=X, Y=Y, 
                     control=list(fnscale=-1, maxit=5000), hessian=TRUE)    
    out[[k]] <- MCMCmetrop1R(fun=rr.logpostdens.k4,
                             theta.init=rnorm(length(out_tmp$par), out_tmp$par, 1),
                             mcmc=niter, thin=thin, tune=1,
                             V=-solve(out_tmp$hessian), verbose=round(niter/10),
                             burnin=burnin)
  }
  theta_post_full <- ldply(.data=out, .fun=function(x) x)
  colnames(theta_post_full) <- c("alpha1raw", "alpha2raw", colnames(X))
  theta_post <- theta_post_full
  if(approx > 0) theta_post <- theta_post_full[sample(1:nrow(theta_post_full), size=approx),]
  
  eff <- c()
  ## Calculate estimates w/o post-stratification
  ##############################################
  if(i == 1)
  {
    beta_post <- theta_post[, 3:ncol(theta_post)]
    alpha_post <- cbind(-Inf, 0, 1.2^theta_post[,1], 
                        1.2^theta_post[,1] + 1.2^theta_post[,2], Inf)
    
    for(j in 1:3)
    {
      this_X <- cbind(1, ifelse(j == 2, 1, 0), ifelse(j == 3, 1, 0),
                      c(1, 0, 0), c(0, 1, 0))
      lp <- this_X %*% t(beta_post)
      
      for(k in 1:4)
      {
        pr <- pnorm(alpha_post[, k+1] - lp) - pnorm(alpha_post[, k] - lp)
        pr2 <- colMeans(pr)
        pr2_q <- quantile(pr2, probs=c(.025, .5, .975))
        
        eff <- rbind(eff,
                     data.frame(Formula=i,
                                Treat=j-1,
                                Level=k,
                                Q025=pr2_q[1], Q500=pr2_q[2],
                                Q975=pr2_q[3]))
      }
    }    
  }
  
  
  ## Calculate post-stratified estimates
  ######################################
  if(i == 2)
  {
    SubstEff <- c()
    n_ps <- nrow(cces_groups)
    X_ps <- X_ps[, 5:ncol(X_ps)]
    beta_post <- theta_post[, 3:ncol(theta_post)]
    alpha_post <- cbind(-Inf, 0, 1.2^theta_post[,1], 
                        1.2^theta_post[,1] + 1.2^theta_post[,2], Inf)
    
    for(j in 1:3)
    {
      r_costs <- sample(c(25, 50, 75), size=n_ps, replace=T) 
      bigX <- cbind(1, ifelse(j == 2, 1, 0), ifelse(j == 3, 1, 0),
                    ifelse(r_costs == 25, 1, 0), ifelse(r_costs == 75, 1, 0), X_ps)
      lp <- bigX %*% t(beta_post)
      
      for(k in 1:4)
      {
        pr <- pnorm(alpha_post[, k+1] - lp) - pnorm(alpha_post[, k] - lp)
        pr2 <- apply(X=pr, MARGIN=2, weighted.mean, w=cces_groups$w_raw)
        pr2_q <- quantile(pr2, probs=c(.025, .5, .975))
        
        eff <- rbind(eff,
                     data.frame(Formula=i,
                                Treat=j-1,
                                Level=k,
                                Q025=pr2_q[1], Q500=pr2_q[2],
                                Q975=pr2_q[3]))
      }
    }
  }
  
  
  ## Run a model so that making tables become easier
  ##################################################
  data$Rating <- factor(data$Rating)
  mod_polr <- polr(formula=formulas[i][[1]], Hess=TRUE, data=data)
  
  
  ## Save output
  ##############
  output[[i]] <- list(post_alpha=cbind(1.2^theta_post_full[, 1], 
                                       1.2^theta_post_full[, 1] + 1.2^theta_post_full[, 2]),
                      post_beta=theta_post_full[, 3:ncol(theta_post_full)],
                      SubstEff=eff,
                      mod_polr=mod_polr,
                      Rhat=max(gelman.diag(mcmc.list(llply(.data=out, .fun=function(x) mcmc(x))))$psrf[,1]))
}

  
## Convergence
##############
load("output/Rhat.Rdata")
Rhat <- rbind(Rhat, data.frame(Rhat=c(output[[1]]$Rhat, output[[2]]$Rhat), Which=paste0("Model A2-", 1:2)))
save(Rhat, file="output/Rhat.Rdata")


## Put estimates together for table
###################################
mods_cut_ci <- mods_coef_ci <- mods_coef_m <- mods_cut_m <- mods_tmp <- vector("list", length(output))
for(i in 1:length(output))
{
  mods_tmp[[i]] <- output[[i]]$mod_polr
  mods_coef_m[[i]] <- colMeans(output[[i]]$post_beta)
  mods_cut_m[[i]] <- colMeans(output[[i]]$post_alpha)
  mods_coef_ci[[i]] <- colQuantiles(as.matrix(output[[i]]$post_beta), probs=c(.025, .975))
  mods_cut_ci[[i]] <- colQuantiles(as.matrix(output[[i]]$post_alpha), probs=c(.025, .975))
}

tab <- stargazer(type="latex", style="ajps",
                 mods_tmp, coef=mods_coef_m, ci.custom=mods_coef_ci, 
                 ci=TRUE, 
                 label="",
                 p.auto=FALSE, report="vcs", table.placement="!ht",
                 covariate.labels=c("Security interests",
                                    "Economic interests", "Cost \\$25m", "Cost \\$75m", 
                                    "Age", "Gender", "High education", "Ideology"),
                 column.labels=c("Bare specification", "With covariates"), 
                 font.size="footnotesize", dep.var.labels="Support for foreign aid",
                 omit="constant", model.names=FALSE, model.numbers=FALSE,
                 digits=2, notes="", df=FALSE, omit.stat=c("ll", "adj.rsq", "f", "rsq", "ser"))
which_row <- which(grepl(x=tab, pattern="N &", fixed=) == TRUE)
tab <- c(tab[1:(which_row-1)], 
         paste0(" Intercept & $", round(mods_coef_m[[1]]["(Intercept)"], di=2), "$ & $",
                round(mods_coef_m[[2]]["(Intercept)"], di=2), "$ \\\\"), 
         paste0("  & $(", round(mods_coef_ci[[1]][1, 1], di=2), ",$ $ ", round(mods_coef_ci[[1]][1, 2], di=2), ")$ & $(",
                round(mods_coef_ci[[2]][1, 1], di=2), ", $ $ ", round(mods_coef_ci[[2]][1, 2], di=2), ")$ \\\\"),
         paste0(" Cut 1 & $", round(mods_cut_m[[1]][1], di=2), "$ & $",
                round(mods_cut_m[[2]][1], di=2), "$ \\\\"), 
         paste0("  & $(", round(mods_cut_ci[[1]][1, 1], di=2), ",$ $ ", round(mods_cut_ci[[1]][1, 2], di=2), ")$ & $(",
                round(mods_cut_ci[[2]][1, 1], di=2), ", $ $ ", round(mods_cut_ci[[2]][1, 2], di=2), ")$ \\\\"),
         paste0(" Cut 2 & $", format(round(mods_cut_m[[1]][2], di=2), nsmall=2), "$ & $",
                format(round(mods_cut_m[[2]][2], di=2), nsmall=2), "$ \\\\"), 
         paste0("  & $(", round(mods_cut_ci[[1]][2, 1], di=2), ",$ $ ", round(mods_cut_ci[[1]][2, 2], di=2), ")$ & $(",
                round(mods_cut_ci[[2]][2, 1], di=2), ", $ $ ", round(mods_cut_ci[[2]][2, 2], di=2), ")$ \\\\"),
         "\\hline \\\\[-1.8ex] ", 
         tab[which_row],
         "\\end{tabular} ",
         "\\caption{\\textbf{Posterior summaries for the survey analysis; September data.} Each column depicts the results for 
               a separate model. The standalone number is the median posterior
         estimate, the range below it the 95\\% central credible interval.}\\label{TabA2:RR-Sept}",
         "\\end{table} ")
tab <- tab[-5]
tab <- str_replace(string=tab, pattern="\\[t\\] ", replacement="\\[!ht\\] ")
write(tab, file="output/tables/a2_rr_september.tex")


## Save effects estimates to disc
#################################
eff <- ldply(.data=output, .fun=function(x) x$SubstEff)
eff[, 4:6] <- round(eff[, 4:6], di=2)
eff$Just <- "Control"
eff$Just[eff$Treat == 1] <- "NSJust"
eff$Just[eff$Treat == 2] <- "EconJust"
write.csv(eff, file="output/tables/RR-September-Effects.csv")


## Make graphs
##################
for(i in 1:2)
{
  nam <- ifelse(i == 1, "NoCovs", "PStratified")
  tmp <- output[[i]]$SubstEff
  tmp$Level2 <- "Strongly oppose"
  tmp$Level2[tmp$Level == 2] <- "Oppose"
  tmp$Level2[tmp$Level == 3] <- "Support"
  tmp$Level2[tmp$Level == 4] <- "Strongly support"
  tmp$Level2 <- factor(tmp$Level2, levels=c("Strongly oppose", "Oppose",
                                            "Support", "Strongly support"))
  
  tmp$Treat2 <- "No justification"
  tmp$Treat2[tmp$Treat == 1] <- "Security interests"
  tmp$Treat2[tmp$Treat == 2] <- "Economic interests"
  tmp$Treat2 <- factor(tmp$Treat2, levels=c("No justification", "Economic interests",
                                            "Security interests"))
  
  g <- ggplot(data=tmp, aes(x=Level2, y=Q500, ymin=Q025, ymax=Q975,
                            group=Treat2, colour=Treat2))
  g <- g + geom_hline(yintercept=0, size=.4)
  g <- g + geom_linerange(position=position_dodge(width=.25), size=.9)
  g <- g + geom_point(position=position_dodge(width=.25), size=3)
  g <- g + xlab("") + theme_bw() 
  g <- g + scale_colour_manual(values=c("black", "grey60", "grey80"))
  g <- g + ylab("Probability") + ggtitle("Support for aid to human rights violator (September survey)")
  g <- g + guides(colour=guide_legend(title="Justification\nTreatment"))
  g <- g + theme(axis.text = element_text(size=rel(1)),
                 panel.grid.major.y = element_line(size=0.25, colour="grey80", 
                                                   linetype="dashed"),
                 panel.grid.major.x = element_blank(),               
                 panel.grid.minor = element_blank(),
                 axis.ticks = element_blank(),
                 strip.text = element_text(size=rel(1.2), hjust=0),
                 strip.background = element_blank(),
                 plot.title = element_text(size=rel(1.45), face="bold", hjust=0))
  ggsave(file=paste0("output/figures/A2-September-", nam, ".pdf"), width=11, height=5.5)
}  
  
  
  


  